This is an R Markdown Notebook associated with the following manuscript: A recombinant mapping yeast population identifies mitotic growth loci influencing mtDNA instability through mitonuclear interactions Tuc H.M. Nguyen, Austen Tinz-Burdick, Meghan Lenhardt, Margaret Geertz, Franchesca Ramirez, Mark Schwartz, Michael Toledano1, Brooke Bonney, Benjamin Gaebler, Weiwei Liu, John F. Wolters, Ken Chiu, Anthony C. Fiumera, Heather L. Fiumera

Final figures edited in Inkscape.

libraries<-c("car", "dplyr", "ggplot2", "ggpubr", "multcomp", "multcompView","tidyverse", "mixlm")
lapply(libraries, require, character.only = TRUE )
## Loading required package: car
## Loading required package: carData
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: ggplot2
## Loading required package: ggpubr
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
## Loading required package: multcompView
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble  3.1.6     ✔ purrr   0.3.4
## ✔ tidyr   1.1.4     ✔ stringr 1.4.0
## ✔ readr   2.1.1     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ MASS::select()  masks dplyr::select()
## ✖ purrr::some()   masks car::some()
## Loading required package: mixlm
## 
## Attaching package: 'mixlm'
## The following object is masked from 'package:dplyr':
## 
##     tally
## The following objects are masked from 'package:stats':
## 
##     glm, lm
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] TRUE
## 
## [[5]]
## [1] TRUE
## 
## [[6]]
## [1] TRUE
## 
## [[7]]
## [1] TRUE
## 
## [[8]]
## [1] TRUE
import data for Fig 1 and Table S2, set factors and classes
#import data
data.wildisolates<-read.csv("parental_pet.csv", header = TRUE, stringsAsFactors = FALSE)
data.S288c<-read.csv("S288c_pet.csv", header = TRUE, stringsAsFactors = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on 'S288c_pet.csv'
#set factors
data.wildisolates$StrainID<-as.factor(data.wildisolates$StrainID)
data.wildisolates$Nuclear<-as.factor(data.wildisolates$Nuclear)
data.wildisolates$NucPop<-as.factor(data.wildisolates$NucPop)
data.wildisolates$Mito<-as.factor(data.wildisolates$Mito)
data.wildisolates$MitoPop<-as.factor(data.wildisolates$MitoPop)
data.wildisolates$og_v_syn<-as.factor(data.wildisolates$og_v_syn)
data.wildisolates$Genotype<-as.factor(data.wildisolates$Genotype)
#set classes
data.wildisolates$Petite<-as.numeric(data.wildisolates$Petite)
data.wildisolates$Grande<-as.numeric(data.wildisolates$Grande)
data.wildisolates$Total<-as.numeric(data.wildisolates$Total)

Fig 1

Genetic variation contributes to mtDNA loss.

Petite frequencies of S. cerevisiae wild isolates as boxplots. The populations as broadly defined in PMCID: 2659681, 6784862, and 4417123

#reorganize isolates for a certain order
data.wildisolates$Nuclear <- factor(data.wildisolates$Nuclear, levels=c("378604X","273614N","322134S","YJM981","YJM975","YJM978","BC187","DBVPG1373","L-1528","L-1374","DBVPG6765","YPS606","YPS128","UWOPS03-461.4","UWOPS05-227.2","UWOPS05-217.3","Y12","Y55","SK1","NCYC110","DBVPG6044"))
#set colorblind palette
cbbPalette2 <- c("WestAfrican" = "#E69F00","Sake" ="#D55E00","Malaysian"= "#CC79A7", "NorthAmerican"="#009E73","European"="#56B4E9" ,"mosaic"="#999999")

Fig1<-ggplot(data.wildisolates,aes(x=Nuclear,y=PetFreq,fill=NucPop))+geom_boxplot()+
  theme_bw()+ 
  theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
  theme(axis.line = element_line(color = 'black'))+
  theme(axis.text.x = element_text(size = 10)) +scale_fill_manual(values=cbbPalette2)+
  scale_y_continuous(breaks=seq(0,12,2))+
  theme(legend.position = c(0.8,0.5))+
  guides(fill=guide_legend(title="Population"))+
  coord_flip()
Fig1

Figure 1 insert for S288c

#added as an insert in Inkscape
#plotted in the same x axis spacing as above 
bp_S288c<-ggplot(data.S288c,aes(x=Nuclear,y=PetFreq,fill=NucPop))+geom_boxplot()+
  theme_bw()+ 
  theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
  theme(axis.line = element_line(color = 'black'))+
  theme(axis.text.x = element_text(size = 10)) +scale_fill_manual(values=cbbPalette2)+
  ylim(42,60)+
  theme(legend.position = "none") +coord_flip()
bp_S288c

#### Table S2: ANOVA #### Petite frequencies vary by strain and population

Are the differences in petite frequencies due to genetic variation in individuals isolates and/or populations?

#The different numbers of petite/grande colonies counted in each assay are taken into account using cbind function
#the family=binomial accounts for non-normal distribution of petite frequencies
#the variable "Nuclear" is used instead of "StrainID" because there are both mata and x versions of some strains

FM.cbind<-glm(cbind(data.wildisolates$Petite,data.wildisolates$Grande)~data.wildisolates$NucPop/data.wildisolates$Nuclear, family=binomial)

anova(FM.cbind, test="Chisq")

Petite frequency averages for each population

pop.means<-data.frame(tapply(predict(FM.cbind, type = "response"), data.wildisolates$NucPop, mean))
pop.means

Fig 2A,B, Table S3

Mitotype influences on petite frequencies are influenced by M4 GC clusters

This section contains the analyses described in Figure 2A and B and Table S3.

Analyzes the petite frequencies of strains with the Y55 nuclear genotype with different mtDNAs

import data and set factors
data.Y55<-read.csv("data.Y55.csv", header = TRUE, stringsAsFactors = FALSE)
#set factors
data.Y55$StrainID<-as.factor(data.Y55$StrainID)
data.Y55$Nuclear<-as.factor(data.Y55$Nuclear)
data.Y55$NucPop<-as.factor(data.Y55$NucPop)
data.Y55$Mito<-as.factor(data.Y55$Mito)
data.Y55$MitoPop<-as.factor(data.Y55$MitoPop)
data.Y55$og_v_syn<-as.factor(data.Y55$og_v_syn)
data.Y55$Genotype<-as.factor(data.Y55$Genotype)
#set classes
data.Y55$Petite<-as.numeric(data.Y55$Petite)
data.Y55$Grande<-as.numeric(data.Y55$Grande)
data.Y55$Total<-as.numeric(data.Y55$Total)

Petite frequencies averages for each genotype

aggregate(PetFreq~Mito,data=data.Y55,mean)

Table S3: ANOVA

mtDNAs influence petite frequencies in the Y55 nuclear background
#Table S3 anova... Does mtDNA variation contribute to petite frequencies?
FM.cbind2<-glm(cbind(data.Y55$Petite,data.Y55$Grande)~data.Y55$Mito, family=binomial)
anova(FM.cbind2, test="Chisq")

Correlations between mtDNA features and petite frequencies

mtDNA analyses and correlations

This sections contains the analyses for Fig 2A,2B and correlations described in text

import and prepare data
data.mtDNA_full<-read.table("nY55_pet_gc_intron_3.txt", header = TRUE, stringsAsFactors = FALSE)
data.mtDNA_full$log2_pm<-log2(data.mtDNA_full$petfreq_mean)
#subset data to just those that have pet freq in the Y55 mtDNA for analyzed mtDNAs
data.mtDNA_Y55nuc<-data.mtDNA_full%>%dplyr::select(petfreq_mean,log2_pm, mtDNA,mtPop,Length,GC_percent,TotalGCcount,total_intronlength,M1,M2,M3,M4,M1p,M2p,M2pp,G,V,U)
#remove rows with missing values
  data.mtDNA_Y55nuc_complete<-data.mtDNA_Y55nuc[complete.cases(data.mtDNA_Y55nuc),]
#test to see if normally distributed
  hist(data.mtDNA_Y55nuc_complete$log2_pm)

  shapiro.test(data.mtDNA_Y55nuc_complete$log2_pm)
## 
##  Shapiro-Wilk normality test
## 
## data:  data.mtDNA_Y55nuc_complete$log2_pm
## W = 0.9448, p-value = 0.3796

Fig 2A

Plot

Plots the petite frequencies of strains with the Y55 nuclear genotype and different mtDNAs vs. mtDNA GC content

cols2<-c("WineEuro"= "blue","Malaysian" ="#E69F00","mosaic"="#999999", "NthAm"="#009E73", "WestAfr" = "#F0E442", "Sake" = "#D55E00")
Fig2A<-ggplot(data=data.mtDNA_Y55nuc_complete, aes(x = GC_percent, y =petfreq_mean)) + 
                geom_point(aes(color=mtPop),size=3.5) +
                scale_color_manual(values=cols2)+
                theme_bw()+ 
                theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
                theme(axis.line = element_line(color = 'black'))+
                theme(axis.text.x = element_text(size = 10))+
                theme(legend.position = "none")+
                geom_smooth(method = "lm", se = FALSE, color="black")+
                labs(x="mtDNA GC%",y="PetFreq")+theme(legend.position="none") 
Fig2A
## `geom_smooth()` using formula 'y ~ x'

#### Correlation for Fig 2A: Correlation between petite frequency and GC percent

RM.GCp<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$GC_percent)
        anova(RM.GCp) #GC_percent p =0.01341 *
        summary(RM.GCp) #R^2 = 0.3435
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$GC_percent)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2315  -2.2226  -0.3326   1.2660  13.2191 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                            -63.582     26.897  -2.364   0.0320 *
## data.mtDNA_Y55nuc_complete$GC_percent    4.626      1.651   2.802   0.0134 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 5.656 on 15 degrees of freedom
## Multiple R-squared: 0.3435,
## Adjusted R-squared: 0.2998 
## F-statistic:  7.85 on 1 and 15 DF,  p-value: 0.01341
        Anova(RM.GCp) #GC_percent p =0.01341
        cor.test.gcppet<-cor.test(data.mtDNA_Y55nuc_complete$GC_percent,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.gcppet #r=0.5861
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$GC_percent and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 2.8017, df = 15, p-value = 0.01341
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1468400 0.8322935
## sample estimates:
##       cor 
## 0.5861178
        cor.test.gcppet$p.value #GCpercent p = 0.01341*
## [1] 0.01341314
        cor.test.gcppet.p<-round(cor.test.gcppet$p.value,3)
        cor.test.gcppet.p #p=0.013 *
## [1] 0.013
        cor.test.gcppet$estimate
##       cor 
## 0.5861178
        cor.test.gcpet.r<-round(cor.test.gcppet$estimate)
        cor.test.gcpet.r
## cor 
##   1
        gcppet.r<-round(cor.test.gcppet$estimate,2)
        gcppet.r2<-round(cor.test.gcppet$estimate^2,2)
        gcppet.r
##  cor 
## 0.59
        gcppet.r2
##  cor 
## 0.34

Fig 2B

Plot

petite frequency vs. M4 clusters

Plots the petite frequencies of strains with the Y55 nuclear genotype and different mtDNAs vs. the numbers of M4-type GC clusters

Fig2B<-ggplot(data=data.mtDNA_Y55nuc_complete, aes(x = M4, y =petfreq_mean)) + 
                geom_point(aes(color=mtPop),size=3.5) +
                scale_color_manual(values=cols2)+
                theme_bw()+ 
                theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
                theme(axis.line = element_line(color = 'black'))+
                theme(axis.text.x = element_text(size = 10))+
                theme(legend.position = "none")+
                geom_smooth(method = "lm", se = FALSE, color="black")+
                labs(x="total M4-type GC clusters",y="PetFreq")+theme(legend.position="none")
Fig2B
## `geom_smooth()` using formula 'y ~ x'

Correlation for Fig 2B

Correlation between petite frequency and M4 cluster numbers

RM.M4<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M4)
        anova(RM.M4) #p= 0.003568 **
        summary(RM.M4) #R^2 = 0.4425
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.832 -4.068 -1.219  1.880 11.004 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     8.2824     1.6020   5.170 0.000114 ***
## data.mtDNA_Y55nuc_complete$M4   0.5345     0.1549   3.451 0.003568 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 5.212 on 15 degrees of freedom
## Multiple R-squared: 0.4425,
## Adjusted R-squared: 0.4053 
## F-statistic: 11.91 on 1 and 15 DF,  p-value: 0.003568
        Anova(RM.M4) #M4 p=
        cor.test.m4<-cor.test(data.mtDNA_Y55nuc_complete$M4,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.m4 #r=0.0.66521
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$M4 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 3.4505, df = 15, p-value = 0.003568
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2713116 0.8682496
## sample estimates:
##     cor 
## 0.66521
        cor.test.m4$p.value #M4 p =
## [1] 0.003568085
        cor.test.m4.p<-round(cor.test.m4$p.value,3)
        cor.test.m4.p #m4 p=0.004 **
## [1] 0.004
        m4.r<-round(cor.test.m4$estimate,2)
        m4.r2<-round(cor.test.m4$estimate^2,2)
        m4.r
##  cor 
## 0.67
        m4.r2
##  cor 
## 0.44
Other Correlations, some described in text

Do GC cluster numbers and GC% correlate with each other? (yes)

FM.gcc.gcp<-lm(data.mtDNA_Y55nuc_complete$GC_percent~data.mtDNA_Y55nuc_complete$TotalGCcount)
      anova(FM.gcc.gcp) 
      summary(FM.gcc.gcp) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$GC_percent ~ data.mtDNA_Y55nuc_complete$TotalGCcount)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4525 -0.2200  0.1045  0.1472  0.4147 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             11.861832   0.389217   30.48 6.59e-15
## data.mtDNA_Y55nuc_complete$TotalGCcount  0.026224   0.002279   11.51 7.66e-09
##                                            
## (Intercept)                             ***
## data.mtDNA_Y55nuc_complete$TotalGCcount ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 0.2822 on 15 degrees of freedom
## Multiple R-squared: 0.8982,
## Adjusted R-squared: 0.8915 
## F-statistic: 132.4 on 1 and 15 DF,  p-value: 7.657e-09
      cor.test.gccgcp<-cor.test(data.mtDNA_Y55nuc_complete$TotalGCcount,data.mtDNA_Y55nuc_complete$GC_percent)
      cor.test.gccgcp 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$TotalGCcount and data.mtDNA_Y55nuc_complete$GC_percent
## t = 11.506, df = 15, p-value = 7.657e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8579176 0.9813575
## sample estimates:
##      cor 
## 0.947753
      cor.test.gccgcp$p.value 
## [1] 7.657339e-09
      #yes, GC cluster numbers correlate with GC percent 

Does GC$ and total mtDNAlength correlate? (yes)

FM.gcpl<-lm(data.mtDNA_Y55nuc_complete$Length~data.mtDNA_Y55nuc_complete$GC_percent)
      anova(FM.gcpl)
      summary(FM.gcpl) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$Length ~ data.mtDNA_Y55nuc_complete$GC_percent)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4612.8 -2319.2  -496.2   983.8 10784.8 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                              35915      16714   2.149   0.0484 *
## data.mtDNA_Y55nuc_complete$GC_percent     2817       1026   2.746   0.0150 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 3515 on 15 degrees of freedom
## Multiple R-squared: 0.3345,
## Adjusted R-squared: 0.2901 
## F-statistic:  7.54 on 1 and 15 DF,  p-value: 0.01501
      cor.test.gcpl<-cor.test(data.mtDNA_Y55nuc_complete$GC_percent,data.mtDNA_Y55nuc_complete$Length)
      cor.test.gcpl 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$GC_percent and data.mtDNA_Y55nuc_complete$Length
## t = 2.7458, df = 15, p-value = 0.01501
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1353403 0.8286526
## sample estimates:
##       cor 
## 0.5783623
      cor.test.gcpl$p.value 
## [1] 0.01501181
      #yes, GC percent and mtDNA length correlate

Do GC cluster numbers (total) correlate with total mtDNA length? (yes)

 # do the total number of GC clusters correlate with mtDNA length?
      FM.gcc.length<-lm(data.mtDNA_Y55nuc_complete$Length~data.mtDNA_Y55nuc_complete$TotalGCcount)
      anova(FM.gcc.length) 
      summary(FM.gcc.length) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$Length ~ data.mtDNA_Y55nuc_complete$TotalGCcount)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4143.8 -1695.7  -462.5   631.3  8530.4 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             65960.88    4262.23  15.476 1.25e-10
## data.mtDNA_Y55nuc_complete$TotalGCcount    93.92      24.96   3.763  0.00188
##                                            
## (Intercept)                             ***
## data.mtDNA_Y55nuc_complete$TotalGCcount ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 3090 on 15 degrees of freedom
## Multiple R-squared: 0.4856,
## Adjusted R-squared: 0.4513 
## F-statistic: 14.16 on 1 and 15 DF,  p-value: 0.00188
      Anova(FM.gcc.length) 
      cor.test.gccl<-cor.test(data.mtDNA_Y55nuc_complete$TotalGCcount,data.mtDNA_Y55nuc_complete$Length)
      cor.test.gccl 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$TotalGCcount and data.mtDNA_Y55nuc_complete$Length
## t = 3.763, df = 15, p-value = 0.00188
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3250890 0.8820597
## sample estimates:
##       cor 
## 0.6968494
      cor.test.gccl$p.value
## [1] 0.001880282
      #yes, GC% correlates with total mtDNA length

Does total intron length correlate with total mtDNA length? (yes)

FM.ibp.length<-lm(data.mtDNA_Y55nuc_complete$Length~data.mtDNA_Y55nuc_complete$total_intronlength)
      anova(FM.ibp.length) 
      summary(FM.ibp.length) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$Length ~ data.mtDNA_Y55nuc_complete$total_intronlength)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3669.2 -1591.1   241.1   633.3  4658.8 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                   6.515e+04  2.381e+03  27.366
## data.mtDNA_Y55nuc_complete$total_intronlength 1.206e+00  1.691e-01   7.131
##                                               Pr(>|t|)    
## (Intercept)                                   3.22e-14 ***
## data.mtDNA_Y55nuc_complete$total_intronlength 3.45e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 2056 on 15 degrees of freedom
## Multiple R-squared: 0.7722,
## Adjusted R-squared: 0.757 
## F-statistic: 50.85 on 1 and 15 DF,  p-value: 3.446e-06
      Anova(FM.ibp.length) 
      cor.test.ibp<-cor.test(data.mtDNA_Y55nuc_complete$total_intronlength,data.mtDNA_Y55nuc_complete$Length)
      cor.test.ibp 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$total_intronlength and data.mtDNA_Y55nuc_complete$Length
## t = 7.1306, df = 15, p-value = 3.446e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6891919 0.9557260
## sample estimates:
##       cor 
## 0.8787463
      cor.test.ibp$p.value 
## [1] 3.445509e-06
      #yes, intron lengths correlate with total mtDNA length

Does total intron length correlate with total GC cluster count? (no)

FM.ibpgcc<-lm(data.mtDNA_Y55nuc_complete$total_intronlength~data.mtDNA_Y55nuc_complete$TotalGCcount)
      anova(FM.ibpgcc) 
      summary(FM.ibpgcc) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$total_intronlength ~ 
##     data.mtDNA_Y55nuc_complete$TotalGCcount)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4203.3 -1094.0  -476.1   536.8  8572.3 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                              6505.79    3890.43   1.672   0.1152  
## data.mtDNA_Y55nuc_complete$TotalGCcount    43.21      22.78   1.897   0.0773 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 2820 on 15 degrees of freedom
## Multiple R-squared: 0.1934,
## Adjusted R-squared: 0.1397 
## F-statistic: 3.597 on 1 and 15 DF,  p-value: 0.0773
      cor.test.ibpgcc<-cor.test(data.mtDNA_Y55nuc_complete$TotalGCcount,data.mtDNA_Y55nuc_complete$total_intronlength)
      cor.test.ibpgcc
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$TotalGCcount and data.mtDNA_Y55nuc_complete$total_intronlength
## t = 1.8967, df = 15, p-value = 0.0773
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05178294  0.75983117
## sample estimates:
##       cor 
## 0.4398083
      cor.test.ibpgcc$p.value 
## [1] 0.07729987
      #no, the total intron length does not correlate with GC cluster count

Does total intron length correlate with total GC percent? (no)

FM.ibpgcp<-lm(data.mtDNA_Y55nuc_complete$total_intronlength~data.mtDNA_Y55nuc_complete$GC_percent)
      anova(FM.ibpgcp) 
      summary(FM.ibpgcp) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$total_intronlength ~ 
##     data.mtDNA_Y55nuc_complete$GC_percent)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3369.4 -1458.6  -822.5   211.3  9813.5 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            -3313.0    14267.1  -0.232    0.820
## data.mtDNA_Y55nuc_complete$GC_percent   1049.9      875.7   1.199    0.249
## 
## s: 3000 on 15 degrees of freedom
## Multiple R-squared: 0.08745,
## Adjusted R-squared: 0.02661 
## F-statistic: 1.437 on 1 and 15 DF,  p-value: 0.2492
      cor.test.ibpgcp<-cor.test(data.mtDNA_Y55nuc_complete$GC_percent,data.mtDNA_Y55nuc_complete$total_intronlength)
      cor.test.ibpgcp 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$GC_percent and data.mtDNA_Y55nuc_complete$total_intronlength
## t = 1.1989, df = 15, p-value = 0.2492
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2155721  0.6797435
## sample estimates:
##       cor 
## 0.2957125
      cor.test.ibpgcp$p.value 
## [1] 0.2491614
      #no, the total intron length does not correlate with GC cluster count

Do petite frequencies correlate with total GC cluster numbers? (no)

 #Pet vs TotalGC cluster
        RM.GCcluster<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$TotalGCcount)
        anova(RM.GCcluster) 
        summary(RM.GCcluster) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$TotalGCcount)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.1392  -2.8345  -1.3316   0.7767  15.9829 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                             -4.63806    8.62552  -0.538   0.5987  
## data.mtDNA_Y55nuc_complete$TotalGCcount  0.09705    0.05051   1.922   0.0739 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.253 on 15 degrees of freedom
## Multiple R-squared: 0.1975,
## Adjusted R-squared: 0.144 
## F-statistic: 3.692 on 1 and 15 DF,  p-value: 0.07387
        Anova(RM.GCcluster) 
        cor.test.gcc<-cor.test(data.mtDNA_Y55nuc_complete$TotalGCcount,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.gcc 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$TotalGCcount and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.9216, df = 15, p-value = 0.07387
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.04602562  0.76225970
## sample estimates:
##       cor 
## 0.4444513
        cor.test.gcc$p.value 
## [1] 0.07386781
        cor.test.gcc.p<-round(cor.test.gcc$p.value,3)
        cor.test.gcc.p 
## [1] 0.074
        #no, petite frequency does not correlate with total GC cluster numbers

Do petite frequencies correlate with total mtDNA length? (no)

  #Pet vs. Length
        RM.Length<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$Length)
        anova(RM.Length) 
        summary(RM.Length) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$Length)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.039 -3.748 -1.223  2.244 19.543 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)                       -4.7972552 33.9761071  -0.141    0.890
## data.mtDNA_Y55nuc_complete$Length  0.0002015  0.0004151   0.486    0.634
## 
## s: 6.926 on 15 degrees of freedom
## Multiple R-squared: 0.01547,
## Adjusted R-squared: -0.05016 
## F-statistic: 0.2357 on 1 and 15 DF,  p-value: 0.6343
        Anova(RM.Length) 
        cor.test.tbp<-cor.test(data.mtDNA_Y55nuc_complete$Length,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.tbp 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$Length and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 0.48551, df = 15, p-value = 0.6343
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3789134  0.5708984
## sample estimates:
##       cor 
## 0.1243846
        cor.test.tbp$p.value 
## [1] 0.6343297
        cor.test.tbp.p<-round(cor.test.tbp$p.value,3)
        cor.test.tbp.p 
## [1] 0.634
        #no, petite frequency does not correlate with total mtDNA length

Do petite frequencies correlate with intron lengths? (no)

        #Pet vs. Intron Length
        RM.intronbp<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$total_intronlength)
        anova(RM.intronbp) 
        summary(RM.intronbp) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$total_intronlength)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.701 -4.601 -1.705  2.306 19.146 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                   14.4735530  8.0481879   1.798
## data.mtDNA_Y55nuc_complete$total_intronlength -0.0002030  0.0005715  -0.355
##                                               Pr(>|t|)  
## (Intercept)                                     0.0923 .
## data.mtDNA_Y55nuc_complete$total_intronlength   0.7274  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.951 on 15 degrees of freedom
## Multiple R-squared: 0.008341,
## Adjusted R-squared: -0.05777 
## F-statistic: 0.1262 on 1 and 15 DF,  p-value: 0.7274
        Anova(RM.intronbp)
        cor.test.ibppet<-cor.test(data.mtDNA_Y55nuc_complete$total_intronlength,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.ibppet 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$total_intronlength and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = -0.35519, df = 15, p-value = 0.7274
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5479205  0.4071916
## sample estimates:
##         cor 
## -0.09132729
        cor.test.ibppet$p.value 
## [1] 0.7273897
        #no, total intron length does not correlate with petfreq

Do petite frequencies correlate with specific GC cluster types? (only M4 class)

        #M1 cluster vs. petfreq_mean
        RM.M1<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M1)
        anova(RM.M1) 
        summary(RM.M1) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.029 -2.934 -1.671  1.023 17.761 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     2.6043     7.8113   0.333    0.743
## data.mtDNA_Y55nuc_complete$M1   0.2054     0.1730   1.187    0.254
## 
## s: 6.674 on 15 degrees of freedom
## Multiple R-squared: 0.08592,
## Adjusted R-squared: 0.02498 
## F-statistic:  1.41 on 1 and 15 DF,  p-value: 0.2535
        Anova(RM.M1) 
        cor.test.m1<-cor.test(data.mtDNA_Y55nuc_complete$M1,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.m1 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$M1 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.1874, df = 15, p-value = 0.2535
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2182716  0.6782166
## sample estimates:
##       cor 
## 0.2931252
        cor.test.m1$p.value 
## [1] 0.253523
        cor.test.m1.p<-round(cor.test.m1$p.value,3)
        cor.test.m1.p 
## [1] 0.254
        #not significant
        
        #M2 cluster vs. petfreq_mean
        RM.M2<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M2)
        anova(RM.M2) 
        summary(RM.M2) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.5515  -3.6054  -1.3358   0.7997  17.5870 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     3.3652     6.2248   0.541    0.597
## data.mtDNA_Y55nuc_complete$M2   0.2969     0.2149   1.382    0.187
## 
## s: 6.575 on 15 degrees of freedom
## Multiple R-squared: 0.1129,
## Adjusted R-squared: 0.05374 
## F-statistic: 1.909 on 1 and 15 DF,  p-value: 0.1873
        Anova(RM.M2)
        cor.test.m2<-cor.test(data.mtDNA_Y55nuc_complete$M2,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.m2
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$M2 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.3816, df = 15, p-value = 0.1873
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1725210  0.7030879
## sample estimates:
##      cor 
## 0.335984
        cor.test.m2$p.value
## [1] 0.1873353
        cor.test.m2.p<-round(cor.test.m2$p.value,3)
        cor.test.m2.p 
## [1] 0.187
        #not significant
        
        #M3 cluster vs. petfreq_mean
        RM.M3<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M3)
        anova(RM.M3) 
        summary(RM.M3) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.681 -3.830 -1.432  1.498 17.082 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     1.4498     8.9812   0.161    0.874
## data.mtDNA_Y55nuc_complete$M3   0.6587     0.5688   1.158    0.265
## 
## s: 6.688 on 15 degrees of freedom
## Multiple R-squared: 0.08205,
## Adjusted R-squared: 0.02085 
## F-statistic: 1.341 on 1 and 15 DF,  p-value: 0.265
        Anova(RM.M3) 
        cor.test.m3<-cor.test(data.mtDNA_Y55nuc_complete$M3,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.m3
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$M3 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.1579, df = 15, p-value = 0.265
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2252060  0.6742588
## sample estimates:
##       cor 
## 0.2864447
        cor.test.m3$p.value  
## [1] 0.2650034
        cor.test.m3.p<-round(cor.test.m3$p.value,3)
        cor.test.m3.p 
## [1] 0.265
        #not significant
        
        #M4 vs. pet freq
        RM.M4<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M4)
        anova(RM.M4) 
        summary(RM.M4) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.832 -4.068 -1.219  1.880 11.004 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     8.2824     1.6020   5.170 0.000114 ***
## data.mtDNA_Y55nuc_complete$M4   0.5345     0.1549   3.451 0.003568 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 5.212 on 15 degrees of freedom
## Multiple R-squared: 0.4425,
## Adjusted R-squared: 0.4053 
## F-statistic: 11.91 on 1 and 15 DF,  p-value: 0.003568
        Anova(RM.M4) 
        cor.test.m4<-cor.test(data.mtDNA_Y55nuc_complete$M4,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.m4 
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$M4 and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 3.4505, df = 15, p-value = 0.003568
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2713116 0.8682496
## sample estimates:
##     cor 
## 0.66521
        cor.test.m4$p.value 
## [1] 0.003568085
        cor.test.m4.p<-round(cor.test.m4$p.value,3)
        cor.test.m4.p 
## [1] 0.004
        #significant ** Fig 2B
        
        #M1p vs. petfreq_mean
        #RAW
        RM.M1p<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M1p)
        anova(RM.M1p)
        summary(RM.M1p) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M1p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.3068 -2.2128 -0.7875  3.7525 15.9792 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     18.0221     3.3812    5.33 8.41e-05 ***
## data.mtDNA_Y55nuc_complete$M1p  -1.4773     0.7068   -2.09   0.0541 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.143 on 15 degrees of freedom
## Multiple R-squared: 0.2255,
## Adjusted R-squared: 0.1739 
## F-statistic: 4.368 on 1 and 15 DF,  p-value: 0.05406
        Anova(RM.M1p) 
        cor.test.m1p<-cor.test(data.mtDNA_Y55nuc_complete$M1p,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.m1p
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$M1p and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = -2.09, df = 15, p-value = 0.05406
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.777970978  0.007434686
## sample estimates:
##        cor 
## -0.4749071
        cor.test.m1p$p.value 
## [1] 0.05405844
        cor.test.m1p.p<-round(cor.test.m1p$p.value,3)
        cor.test.m1p.p 
## [1] 0.054
        #not significant
        
        #M2p cluster vs. petfreq_mean
        RM.M2p<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M2p)
        anova(RM.M2p) 
        summary(RM.M2p) 
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M2p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.136  -3.557  -1.365   0.624  18.184 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)
## (Intercept)                      6.1514     5.2752   1.166    0.262
## data.mtDNA_Y55nuc_complete$M2p   0.3196     0.2901   1.101    0.288
## 
## s: 6.714 on 15 degrees of freedom
## Multiple R-squared: 0.07483,
## Adjusted R-squared: 0.01316 
## F-statistic: 1.213 on 1 and 15 DF,  p-value: 0.288
        Anova(RM.M2p)
        cor.test.m2p<-cor.test(data.mtDNA_Y55nuc_complete$M2p,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.m2p
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$M2p and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 1.1015, df = 15, p-value = 0.288
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2384390  0.6665597
## sample estimates:
##       cor 
## 0.2735565
        cor.test.m2p$p.value 
## [1] 0.2880426
        cor.test.m2p.p<-round(cor.test.m2p$p.value,3)
        cor.test.m2p.p
## [1] 0.288
        #not significant
        
        #M2pp cluster vs. petfreq_mean
        RM.M2pp<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$M2pp)
        anova(RM.M2pp)
        summary(RM.M2pp)
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$M2pp)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.659 -4.026 -2.131  2.438 19.016 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      15.2963     6.4719   2.363    0.032 *
## data.mtDNA_Y55nuc_complete$M2pp  -0.5443     0.9405  -0.579    0.571  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.904 on 15 degrees of freedom
## Multiple R-squared: 0.02184,
## Adjusted R-squared: -0.04337 
## F-statistic: 0.3349 on 1 and 15 DF,  p-value: 0.5714
        Anova(RM.M2pp) 
        cor.test.m2pp<-cor.test(data.mtDNA_Y55nuc_complete$M2pp,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.m2pp
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$M2pp and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = -0.57873, df = 15, p-value = 0.5714
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5867527  0.3583099
## sample estimates:
##        cor 
## -0.1477866
        cor.test.m2pp$p.value
## [1] 0.5713597
        cor.test.m2pp.p<-round(cor.test.m2pp$p.value,3)
        cor.test.m2pp.p
## [1] 0.571
        #G cluster vs. petfreq_mean
        RM.G<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$G)
        anova(RM.G)
        summary(RM.G)
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$G)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.160 -3.697 -1.655  1.655 19.215 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                    9.2204     3.3888   2.721   0.0158 *
## data.mtDNA_Y55nuc_complete$G   0.8705     1.0473   0.831   0.4189  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.825 on 15 degrees of freedom
## Multiple R-squared: 0.04403,
## Adjusted R-squared: -0.0197 
## F-statistic: 0.6909 on 1 and 15 DF,  p-value: 0.4189
        Anova(RM.G) 
        cor.test.G<-cor.test(data.mtDNA_Y55nuc_complete$G,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.G
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$G and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 0.83122, df = 15, p-value = 0.4189
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3011804  0.6272246
## sample estimates:
##       cor 
## 0.2098411
        cor.test.G$p.value 
## [1] 0.418886
        cor.test.G.p<-round(cor.test.G$p.value,3)
        cor.test.G.p
## [1] 0.419
        #not significant
        
        #V cluster vs. petfreq_mean
        RM.V<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$V)
        anova(RM.V)
        summary(RM.V)
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$V)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.6878 -5.0978 -0.6324  0.5156 17.2823 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                    19.675      6.762   2.910   0.0108 *
## data.mtDNA_Y55nuc_complete$V   -2.955      2.427  -1.218   0.2421  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.659 on 15 degrees of freedom
## Multiple R-squared: 0.08997,
## Adjusted R-squared: 0.02931 
## F-statistic: 1.483 on 1 and 15 DF,  p-value: 0.2421
        Anova(RM.V) 
        cor.test.V<-cor.test(data.mtDNA_Y55nuc_complete$V,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.V
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$V and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = -1.2178, df = 15, p-value = 0.2421
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6822409  0.2111270
## sample estimates:
##        cor 
## -0.2999565
        cor.test.V$p.value 
## [1] 0.2421096
        cor.test.V.p<-round(cor.test.V$p.value,3)
        cor.test.V.p
## [1] 0.242
        #not significant
        
        #U cluster vs. petfreq_mean
        RM.U<-lm(data.mtDNA_Y55nuc_complete$petfreq_mean~data.mtDNA_Y55nuc_complete$U)
        anova(RM.U)
        summary(RM.U)
## 
## Call:
## lm(formula = data.mtDNA_Y55nuc_complete$petfreq_mean ~ data.mtDNA_Y55nuc_complete$U)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.043  -3.947   0.541   1.907  17.689 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                  -27.9835    19.3727  -1.444   0.1692  
## data.mtDNA_Y55nuc_complete$U   0.9843     0.4793   2.053   0.0579 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.167 on 15 degrees of freedom
## Multiple R-squared: 0.2194,
## Adjusted R-squared: 0.1674 
## F-statistic: 4.217 on 1 and 15 DF,  p-value: 0.0579
        Anova(RM.U)
        cor.test.U<-cor.test(data.mtDNA_Y55nuc_complete$U,data.mtDNA_Y55nuc_complete$petfreq_mean)
        cor.test.U
## 
##  Pearson's product-moment correlation
## 
## data:  data.mtDNA_Y55nuc_complete$U and data.mtDNA_Y55nuc_complete$petfreq_mean
## t = 2.0534, df = 15, p-value = 0.0579
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.01576711  0.77465981
## sample estimates:
##       cor 
## 0.4684276
        cor.test.U$p.value
## [1] 0.05789539
        cor.test.U.p<-round(cor.test.U$p.value,3)
        cor.test.U.p
## [1] 0.058
        #not significant

Fig 2C

Strains with original or Y55 mtDNAs

Boxplot of petite frequencies for strains with their original mtDNAs or the mtDNA from Y55

import and prepare data
data.Y55.OG<-read.csv("data.Y55.OG.csv", header = TRUE, stringsAsFactors = FALSE)
str(data.Y55.OG)
## 'data.frame':    71 obs. of  13 variables:
##  $ StrainID    : chr  "ML9x1UB" "ML9x1UB" "ML9x1UB" "ML9x1UB" ...
##  $ Nuclear     : chr  "Y55" "Y55" "Y55" "Y55" ...
##  $ NucPop_liti : chr  "WestAfrican" "WestAfrican" "WestAfrican" "WestAfrican" ...
##  $ Mito        : chr  "Y55" "Y55" "Y55" "Y55" ...
##  $ MitoPop     : chr  "WestAfrican" "WestAfrican" "WestAfrican" "WestAfrican" ...
##  $ Ploidy      : chr  "Haploid" "Haploid" "Haploid" "Haploid" ...
##  $ og_v_syn    : chr  "original" "original" "original" "original" ...
##  $ Genotype    : chr  "MATxura3arg8::URA3" "MATxura3arg8::URA3" "MATxura3arg8::URA3" "MATxura3arg8::URA3" ...
##  $ Petite      : int  35 110 294 35 110 294 213 89 263 183 ...
##  $ Grande      : int  405 774 2864 405 774 2864 1738 1469 2384 1624 ...
##  $ Total       : int  440 884 3158 440 884 3158 1951 1558 2647 1807 ...
##  $ PetFreq     : num  7.95 12.44 9.31 7.95 12.44 ...
##  $ Concatenated: chr  "Y55 Y55" "Y55 Y55" "Y55 Y55" "Y55 Y55" ...
#set factors
data.Y55.OG$StrainID<-as.factor(data.Y55.OG$StrainID)
data.Y55.OG$Nuclear<-as.factor(data.Y55.OG$Nuclear)
data.Y55.OG$NucPop<-as.factor(data.Y55.OG$NucPop)
data.Y55.OG$Mito<-as.factor(data.Y55.OG$Mito)
data.Y55.OG$MitoPop<-as.factor(data.Y55.OG$MitoPop)
data.Y55.OG$og_v_syn<-as.factor(data.Y55.OG$og_v_syn)
data.Y55.OG$Genotype<-as.factor(data.Y55.OG$Genotype)
#set classes
data.Y55.OG$Petite<-as.numeric(data.Y55.OG$Petite)
data.Y55.OG$Grande<-as.numeric(data.Y55.OG$Grande)
data.Y55.OG$Total<-as.numeric(data.Y55.OG$Total)
#subset to compare each nuclear genotype with original vs. Y55 mtDNA
#not elegant but effective
data1<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="Y55")
data2<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="SK1")
data3<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="NCYC110")
data4<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="DBVPG6044")
data5<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="Y12")
data6<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="YPS606")
data7<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="BC187")
data8<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="YJM975")
data9<-subset(data.Y55.OG,data.Y55.OG$Nuclear=="YJM981")

Fig 2C plot

p<-ggplot(data.Y55.OG, aes(x = Nuclear, y = PetFreq, linetype = factor(og_v_syn), fill = factor(NucPop))) + 
  geom_boxplot(aes(fill = NucPop))+
  theme_bw()+ 
  theme(plot.background = element_blank(),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(),panel.border = element_blank() ) +
  theme(axis.line = element_line(color = 'black'))+
  theme(axis.text.x = element_text(size = 8))+
  scale_fill_manual(name = "NucPop", values = cbbPalette2)+
  scale_alpha_manual(name = "og_v_syn", values = c(1, 0.8)) +
  scale_linetype_manual(name = "og_v_syn", values = c("solid", "dotted"))+
  xlab("Nuclear genotype")+ ylab("Petite frequency (%)")+
  theme(legend.position = "right")+
  guides(fill=guide_legend(title="Population"))+
  theme(legend.position = "right")
p

#### Fig 2C Stats Is there a difference between the petite frequencies of SK1nuc + SK1mt vs. SK1nuc +Y55mt?

FM.cbind.SK1<-glm(cbind(Petite,Grande)~og_v_syn,data=data2, family=binomial)
anova(FM.cbind.SK1, test="Chisq") #P=0.02 *

Is there a difference between the petite frequencies of NCYC110nuc + NCYC110mt vs. NCYC110nuc +Y55mt?

FM.cbind.NCYC110<-glm(cbind(Petite,Grande)~og_v_syn,data=data3, family=binomial)
anova(FM.cbind.NCYC110, test="Chisq") #P<0.001 ***

Is there a difference between the petite frequencies of DBVPG6044nuc + DBVPG6044mt vs. DBVPG6044nuc +Y55mt?

FM.cbind.DBVPG6044<-glm(cbind(Petite,Grande)~og_v_syn,data=data4, family=binomial)
anova(FM.cbind.DBVPG6044, test="Chisq") #P = 0.144 ns

Is there a difference between the petite frequencies of Y12nuc + Y12mt vs. Y12nuc +Y55mt?

FM.cbind.Y12<-glm(cbind(Petite,Grande)~og_v_syn,data=data5, family=binomial)
anova(FM.cbind.Y12, test="Chisq") #P=0.007 **

Is there a difference between the petite frequencies of YPS606nuc + YPS606mt vs. YPS606nuc +Y55mt?

FM.cbind.YPS606<-glm(cbind(Petite,Grande)~og_v_syn,data=data6, family=binomial)
anova(FM.cbind.YPS606, test="Chisq") #P<0.001 ***

Is there a difference between the petite frequencies of BC187nuc + BC187mt vs. BC187nuc +Y55mt?

FM.cbind.BC187<-glm(cbind(Petite,Grande)~og_v_syn,data=data7, family=binomial)
anova(FM.cbind.BC187, test="Chisq") #P=0.20 ns

Is there a difference between the petite frequencies of YJM975nuc + YJM975mt vs. YJM975nuc +Y55mt?

FM.cbind.YJM975<-glm(cbind(Petite,Grande)~og_v_syn,data=data8, family=binomial)
anova(FM.cbind.YJM975, test="Chisq") #P<0.001 ***

Is there a difference between the petite frequencies of YJM981nuc + YJM981mt vs. YJM981nuc +Y55mt?

FM.cbind.YJM981<-glm(cbind(Petite,Grande)~og_v_syn,data=data9, family=binomial)
anova(FM.cbind.YJM981, test="Chisq") #P<0.001 ***

Figure 3

Coevolved mitonuclear interactions stabilize mtDNAs.

Analysis of petite frequencies from a 4 nuc x 4 mtDNA mitonuclear collection (16 genotypes). All nuc and mtDNAs have West African origins.

import data

data.4x4<-read.csv("data.4x4mtnWA.csv", header = TRUE, stringsAsFactors = FALSE)
data.4x4$StrainID<-as.factor(data.4x4$StrainID)
data.4x4$Nuclear<-as.factor(data.4x4$Nuclear)
data.4x4$NucPop<-as.factor(data.4x4$NucPop)
data.4x4$Mito<-as.factor(data.4x4$Mito)
data.4x4$MitoPop<-as.factor(data.4x4$MitoPop)
data.4x4$og_v_syn<-as.factor(data.4x4$og_v_syn)
data.4x4$Genotypes<-as.factor(data.4x4$Genotype)
str(data.4x4)
## 'data.frame':    66 obs. of  13 variables:
##  $ StrainID : Factor w/ 19 levels "21a1","ML11x1UA",..: 1 1 1 1 1 1 3 3 3 15 ...
##  $ Nuclear  : Factor w/ 4 levels "DBVPG6044","NCYC110",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ NucPop   : Factor w/ 1 level "WestAfrican": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Mito     : Factor w/ 4 levels "DBVPG6044","NCYC110",..: 1 1 1 1 1 1 1 1 1 2 ...
##  $ MitoPop  : Factor w/ 1 level "WestAfrican": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Ploidy   : chr  "Haploid" "Haploid" "Haploid" "Haploid" ...
##  $ og_v_syn : Factor w/ 2 levels "original","synthetic": 1 1 1 1 1 1 1 1 1 2 ...
##  $ Genotype : chr  "MATaura3" "MATaura3" "MATaura3" "MATaura3" ...
##  $ Petite   : int  52 49 70 126 104 146 23 18 77 143 ...
##  $ Grande   : int  892 721 1005 2152 1835 2336 620 928 870 2087 ...
##  $ Total    : int  944 770 1075 2278 1939 2482 643 946 947 2230 ...
##  $ PetFreq  : num  5.51 6.36 6.51 5.53 5.36 5.88 3.58 1.9 8.13 6.41 ...
##  $ Genotypes: Factor w/ 3 levels "MATaura3","MATxura3",..: 1 1 1 1 1 1 3 3 3 2 ...

Interaction Plot

cbbPalette <- c("#000000", "#009E73","#D55E00","#0072B2")

interaction.plot(data.4x4$Mito,data.4x4$Nuclear,data.4x4$PetFreq,
                 lwd=2.5,
                 xlab = "mitotype",ylab = "petite frequency (%)",
                 ylim=c(0,35),trace.label="Nuclear genotype", type = "b", col=c(cbbPalette),
                 pch = c(15,17,19,6), fixed =TRUE)

Table S5

ANOVA Petite frequencies influenced by mitonuclear interactions in a 4x4 mitonuclear genotype panel

Analysis of petite frequencies from a 4 nuc x 4 mtDNA mitonuclear collection (16 genotypes). All nuc and mtDNAs have West African origins.

ANOVA Are there significant nuclear, mtDNA, and mitonuclear interactions in the 4x4 WA collection?

FM.cbind.test<-glm(cbind(Petite,Grande)~Nuclear*Mito,data=data.4x4, family=binomial)
anova(FM.cbind.test, test="Chisq")

Fig S1

Strains harboring original mtDNAs have lower petite frequencies than those with synthetic mitonuclear combinations.

Analysis of petite frequencies from a 4 nuc x 4 mtDNA mitonuclear collection (16 genotypes). All nuc and mtDNAs have West African origins. original = strains with their original, coadapted mtDNA synthetic = strains with introduced mtDNAs

cbbPalette3 <- c("original" = "#E69F00","synthetic" ="#999999")

FigS1<-ggplot(data.4x4,aes(x=Nuclear,y=PetFreq,fill=og_v_syn))+geom_boxplot()+
  theme_bw()+ 
  theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) +
  theme(axis.text.x = element_text(size = 10, face="bold")) +scale_fill_manual(values=cbbPalette3)+
  theme(axis.title = element_text(size = 12, face="bold"))+
  xlab("Nuclear Genotype") + ylab("petite frequency (%)")+
  theme(legend.position = c(0.3,0.8),
  legend.title=element_text(size=12,face="bold"), 
  legend.text=element_text(size=10))+
  guides(fill=guide_legend(title="Mitonuclear combination"))
FigS1

#### Fig S1 ##### anova comparisons

to calculate P values for each comparison of strains with original or synthetic mtn combinations

subset data

#subset data
data.DB<-subset(data.4x4,data.4x4$Nuclear=="DBVPG6044")
data.NC<-subset(data.4x4,data.4x4$Nuclear=="NCYC110")
data.Y55<-subset(data.4x4,data.4x4$Nuclear=="Y55")
data.SK1<-subset(data.4x4,data.4x4$Nuclear=="SK1")
FigS1 ANOVA: DBVPG6044 og vs. syn
RM.DB<-glm(cbind(Petite,Grande)~og_v_syn/StrainID,data=data.DB, family=binomial)
anova(RM.DB,test="Chisq")
FigS1 ANOVA: NCYC110 og vs. syn
RM.NC<-glm(cbind(Petite,Grande)~og_v_syn/StrainID,data=data.NC, family=binomial)
anova(RM.NC,test="Chisq")
FigS1 ANOVA: Y55 og vs. syn
RM.Y55<-glm(cbind(Petite,Grande)~og_v_syn/StrainID,data=data.Y55, family=binomial)
anova(RM.Y55,test="Chisq")
FigS1 ANOVA: SK1 og vs. syn
RM.SK1<-glm(cbind(Petite,Grande)~og_v_syn/StrainID,data=data.SK1, family=binomial)
anova(RM.SK1,test="Chisq")

Figure 6

Mitonuclear interactions influencing mtDNA stability include HGH1 and YAT1.

import and prepare data

data.ko<-read.csv("parent_v_ko.csv", header = TRUE, stringsAsFactors = FALSE)

#set factors
data.ko$StrainID<-as.factor(data.ko$StrainID)
data.ko$Nuclear<-as.factor(data.ko$Nuclear)
data.ko$Mito<-as.factor(data.ko$Mito)
data.ko$og_v_syn<-as.factor(data.ko$og_v_syn)

#set classes
data.ko$Petite<-as.numeric(data.ko$Petite)
data.ko$Grande<-as.numeric(data.ko$Grande)
data.ko$Total<-as.numeric(data.ko$Total)

Fig 6A

Mitonuclear interactions influencing mtDNA stability include HGH1 and YAT1

examine petite frequency differences in the parental strain BY4741 vs. knockouts

#first reorder dataset
data.ko$Nuclear<-factor(data.ko$Nuclear,levels=c("parent","est1D","hgh1D","bns1D","smi1D","yat1D"))
 
Fig6A<-ggplot(data.ko, aes(x=Nuclear, y=Petfreq, fill=Nuclear)) + 
    geom_violin(trim=FALSE)+theme_bw() +geom_boxplot(width=0.1)+ scale_fill_manual(values=c("#999999", "#D55E00", "#006633","#66CC66","#00FF00","#3399FF"))+theme(legend.position="none")
Fig6A

##### Fig 6A Anova comparisons ##### compare parental strain vs. each knockout

anova parent vs. est1-delta
     #subset to parent and est1D
      data.e<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="est1D")
      str(data.e)
## 'data.frame':    40 obs. of  12 variables:
##  $ StrainID  : Factor w/ 6 levels "BY4741","YAR035W",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ Nuclear   : Factor w/ 6 levels "parent","est1D",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Mito      : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
##  $ og_v_syn  : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Assay     : chr  "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
##  $ Plate     : chr  "5A" "5B" "5C" "5D" ...
##  $ Petite    : num  8 8 6 3 12 10 8 1 4 4 ...
##  $ Grande    : num  54 83 45 71 79 45 57 25 24 53 ...
##  $ Total     : num  62 91 51 74 91 55 65 26 28 57 ...
##  $ Petfreq   : num  12.9 8.79 11.76 4.05 13.19 ...
##  $ Pet.mean  : num  12.9 12.9 12.9 12.9 12.9 ...
##  $ Pet.median: num  12.6 12.6 12.6 12.6 12.6 ...
      wilcox.test(data.e$Petfreq~data.e$Nuclear)   #W=383, p=1.75e-08***
## 
##  Wilcoxon rank sum exact test
## 
## data:  data.e$Petfreq by data.e$Nuclear
## W = 383, p-value = 1.758e-08
## alternative hypothesis: true location shift is not equal to 0
      FM.e<-lm(data.e$Petfreq~data.e$Nuclear)
      summary(FM.e)
## 
## Call:
## lm(formula = data.e$Petfreq ~ data.e$Nuclear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.6320  -3.6367  -0.6807   1.8520  20.4012 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       21.694      1.095  19.811  < 2e-16 ***
## data.e$Nuclear1    8.762      1.095   8.001 1.13e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.926 on 38 degrees of freedom
## Multiple R-squared: 0.6275,
## Adjusted R-squared: 0.6177 
## F-statistic: 64.02 on 1 and 38 DF,  p-value: 1.135e-09
      anova(FM.e) #p=1.135e-09***
      FM.e2<-glm(cbind(data.e$Petite, data.e$Grande)~data.e$Nuclear, family = binomial)
      anova(FM.e2, test = "Chisq") #p<2.2e-16 ***
anova parent vs. hgh1-delta
 #subset to parent and hgh1
      data.h<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="hgh1D")
      str(data.h)
## 'data.frame':    37 obs. of  12 variables:
##  $ StrainID  : Factor w/ 6 levels "BY4741","YAR035W",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Nuclear   : Factor w/ 6 levels "parent","est1D",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Mito      : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
##  $ og_v_syn  : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Assay     : chr  "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
##  $ Plate     : chr  "2A" "2B" "2C" "2D" ...
##  $ Petite    : num  30 12 25 15 11 16 5 20 21 13 ...
##  $ Grande    : num  65 67 71 71 44 66 22 47 46 47 ...
##  $ Total     : num  95 79 96 86 55 82 27 67 67 60 ...
##  $ Petfreq   : num  31.6 15.2 26 17.4 20 ...
##  $ Pet.mean  : num  24 24 24 24 24 ...
##  $ Pet.median: num  22.2 22.2 22.2 22.2 22.2 ...
      wilcox.test(data.h$Petfreq~data.h$Nuclear)#W=257 p=0.008369** #warning "cannot compute exact p=value with ties"
## Warning in wilcox.test.default(x = c(26.92307692, 25.26315789, 28.84615385, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data.h$Petfreq by data.h$Nuclear
## W = 257, p-value = 0.008369
## alternative hypothesis: true location shift is not equal to 0
      FM.h<-lm(data.h$Petfreq~data.h$Nuclear)
      summary(FM.h)
## 
## Call:
## lm(formula = data.h$Petfreq ~ data.h$Nuclear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.632  -3.958  -1.609   4.990  17.896 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       27.207      1.094  24.878  < 2e-16 ***
## data.h$Nuclear1    3.249      1.094   2.971  0.00534 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.63 on 35 degrees of freedom
## Multiple R-squared: 0.2014,
## Adjusted R-squared: 0.1786 
## F-statistic: 8.826 on 1 and 35 DF,  p-value: 0.005338
      anova(FM.h) #p=0.005338**
      FM.h2<-glm(cbind(data.h$Petite, data.h$Grande)~data.h$Nuclear, family = binomial)
      anova(FM.h2, test = "Chisq") #p<0.0001307 ***
anova parent vs. bns1-delta
 #subset to parent and bns1
      data.b<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="bns1D")
      str(data.b)
## 'data.frame':    40 obs. of  12 variables:
##  $ StrainID  : Factor w/ 6 levels "BY4741","YAR035W",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ Nuclear   : Factor w/ 6 levels "parent","est1D",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Mito      : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
##  $ og_v_syn  : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Assay     : chr  "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
##  $ Plate     : chr  "1A" "1B" "1C" "1D" ...
##  $ Petite    : num  19 23 37 32 18 16 15 15 22 7 ...
##  $ Grande    : num  31 43 67 59 64 50 49 38 49 27 ...
##  $ Total     : num  50 66 104 91 82 66 64 53 71 34 ...
##  $ Petfreq   : num  38 34.8 35.6 35.2 22 ...
##  $ Pet.mean  : num  29.2 29.2 29.2 29.2 29.2 ...
##  $ Pet.median: num  28.4 28.4 28.4 28.4 28.4 ...
      wilcox.test(data.b$Petfreq~data.b$Nuclear) #W=217, p=0.6588
## 
##  Wilcoxon rank sum exact test
## 
## data:  data.b$Petfreq by data.b$Nuclear
## W = 217, p-value = 0.6588
## alternative hypothesis: true location shift is not equal to 0
      FM.b<-lm(data.b$Petfreq~data.b$Nuclear)
      summary(FM.b)
## 
## Call:
## lm(formula = data.b$Petfreq ~ data.b$Nuclear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.632  -4.655  -1.022   5.433  17.896 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      29.8067     1.0669  27.937   <2e-16 ***
## data.b$Nuclear1   0.6489     1.0669   0.608    0.547    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.748 on 38 degrees of freedom
## Multiple R-squared: 0.009639,
## Adjusted R-squared: -0.01642 
## F-statistic: 0.3699 on 1 and 38 DF,  p-value: 0.5467
      anova(FM.b) #p=0.547 ns
      FM.b2<-glm(cbind(data.b$Petite, data.b$Grande)~data.b$Nuclear, family = binomial)
      anova(FM.b2, test = "Chisq") #p<0.0.7255 ns
anova parent vs. smi1-delta
 #subset to parent and smi1
      data.s<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="smi1D")
      str(data.s)
## 'data.frame':    40 obs. of  12 variables:
##  $ StrainID  : Factor w/ 6 levels "BY4741","YAR035W",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Nuclear   : Factor w/ 6 levels "parent","est1D",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Mito      : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
##  $ og_v_syn  : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Assay     : chr  "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
##  $ Plate     : chr  "6A" "6B" "6C" "6D" ...
##  $ Petite    : num  21 24 45 33 24 31 31 12 25 43 ...
##  $ Grande    : num  57 71 111 70 58 53 49 45 100 101 ...
##  $ Total     : num  78 95 156 103 82 84 80 57 125 144 ...
##  $ Petfreq   : num  26.9 25.3 28.8 32 29.3 ...
##  $ Pet.mean  : num  30.5 30.5 30.5 30.5 30.5 ...
##  $ Pet.median: num  29.1 29.1 29.1 29.1 29.1 ...
      wilcox.test(data.s$Petfreq~data.s$Nuclear) #W=208, p=0.8392  #warning "cannot compute exact p=value with ties"
## Warning in wilcox.test.default(x = c(26.92307692, 25.26315789, 28.84615385, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data.s$Petfreq by data.s$Nuclear
## W = 208, p-value = 0.8392
## alternative hypothesis: true location shift is not equal to 0
      FM.s<-lm(data.s$Petfreq~data.s$Nuclear)
      summary(FM.s)
## 
## Call:
## lm(formula = data.s$Petfreq ~ data.s$Nuclear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.247  -5.252  -1.699   5.571  18.143 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      30.4421     1.2980   23.45   <2e-16 ***
## data.s$Nuclear1   0.0134     1.2980    0.01    0.992    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 8.209 on 38 degrees of freedom
## Multiple R-squared: 2.805e-06,
## Adjusted R-squared: -0.02631 
## F-statistic: 0.0001066 on 1 and 38 DF,  p-value: 0.9918
      anova(FM.s) #p=0.9918 ns
      FM.s2<-glm(cbind(data.s$Petite, data.s$Grande)~data.s$Nuclear, family = binomial)
      anova(FM.s2, test = "Chisq") #p<0.0.7886 ns
anova parent vs. yat1-delta
#subset to parent and yat1
      data.y<-subset(data.ko,data.ko$Nuclear=="parent"|data.ko$Nuclear=="yat1D")
      str(data.y)
## 'data.frame':    40 obs. of  12 variables:
##  $ StrainID  : Factor w/ 6 levels "BY4741","YAR035W",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Nuclear   : Factor w/ 6 levels "parent","est1D",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Mito      : Factor w/ 1 level "BY4741": 1 1 1 1 1 1 1 1 1 1 ...
##  $ og_v_syn  : Factor w/ 1 level "original": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Assay     : chr  "fluccuation" "fluccuation" "fluccuation" "fluccuation" ...
##  $ Plate     : chr  "6A" "6B" "6C" "6D" ...
##  $ Petite    : num  21 24 45 33 24 31 31 12 25 43 ...
##  $ Grande    : num  57 71 111 70 58 53 49 45 100 101 ...
##  $ Total     : num  78 95 156 103 82 84 80 57 125 144 ...
##  $ Petfreq   : num  26.9 25.3 28.8 32 29.3 ...
##  $ Pet.mean  : num  30.5 30.5 30.5 30.5 30.5 ...
##  $ Pet.median: num  29.1 29.1 29.1 29.1 29.1 ...
      wilcox.test(data.y$Petfreq~data.y$Nuclear) #W=172   p=0.4569   #warning "cannot compute exact p=value with ties"
## Warning in wilcox.test.default(x = c(26.92307692, 25.26315789, 28.84615385, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data.y$Petfreq by data.y$Nuclear
## W = 172, p-value = 0.4569
## alternative hypothesis: true location shift is not equal to 0
      FM.y<-lm(data.y$Petfreq~data.y$Nuclear)
      summary(FM.y)
## 
## Call:
## lm(formula = data.y$Petfreq ~ data.y$Nuclear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.791  -3.867  -1.398   4.764  17.896 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      31.3189     1.2608  24.841   <2e-16 ***
## data.y$Nuclear1  -0.8634     1.2608  -0.685    0.498    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 7.974 on 38 degrees of freedom
## Multiple R-squared: 0.01219,
## Adjusted R-squared: -0.0138 
## F-statistic: 0.469 on 1 and 38 DF,  p-value: 0.4976
      anova(FM.y) #p=0.4978
      FM.y2<-glm(cbind(data.y$Petite, data.y$Grande)~data.y$Nuclear, family = binomial)
      anova(FM.y2, test = "Chisq") #p<0.0.3097 ns

Fig 6B

Petite frequencies of hgh1-delta and yat1-delta strains depend on mitotype.

import data

data.ko.mtn<-read.csv("data.ko.phy.mtn2.csv", header = TRUE, stringsAsFactors = FALSE)

data.ko.mtn$Mito<-as.factor(data.ko.mtn$Mito)
     data.ko.mtn$Nuclear<-as.factor(data.ko.mtn$Nuclear)
      data.ko.mtn$StrainID<-as.factor(data.ko.mtn$StrainID)
      data.ko.mtn$og_v_syn<-as.factor(data.ko.mtn$og_v_syn)
      data.ko.mtn$Assay<-as.factor(data.ko.mtn$Assay)
      data.ko.mtn$Plate<-as.factor(data.ko.mtn$Plate)

interaction plot

interaction.plot(data.ko.mtn$Mito,data.ko.mtn$Nuclear,data.ko.mtn$Pet.mean,
                 xlab = "Mito",ylab = "Petmean",
                 ylim=c(0,45),trace.label="nuclear", type = "b", col=c("#D55E00","green","black","blue"),lty=c(1,1,2,1),lwd=2,
                 pch = c(18,16,17,15),fixed =TRUE)

##### Fig 6B and Table S10 Statistics ##### Table S10 anovas ###### Table S10 anova on full dataset

      FM.ko.mtn.cbind<-glm(cbind(data.ko.mtn$Petite, data.ko.mtn$Grande)~data.ko.mtn$Nuclear*data.ko.mtn$Mito, family = binomial)
      anova(FM.ko.mtn.cbind, test = "Chisq")
anova: pairwise comparisons
subset data for each pairwise test
data.ph<-subset(data.ko.mtn,data.ko.mtn$Nuclear=="parent"|data.ko.mtn$Nuclear=="hgh1D")
data.phPN<-subset(data.ph,data.ph$Mito=="BY4741"|data.ph$Mito=="NCYC110")
data.phPY<-subset(data.ph,data.ph$Mito=="BY4741"|data.ph$Mito=="YPS606")

data.py<-subset(data.ko.mtn,data.ko.mtn$Nuclear=="parent"|data.ko.mtn$Nuclear=="yat1D")
data.pyPN<-subset(data.py,data.py$Mito=="BY4741"|data.py$Mito=="NCYC110")
data.pyPY<-subset(data.py,data.py$Mito=="BY4741"|data.py$Mito=="YPS606")

data.pe<-subset(data.ko.mtn,data.ko.mtn$Nuclear=="parent"|data.ko.mtn$Nuclear=="est1D")
data.pePN<-subset(data.pe,data.pe$Mito=="BY4741"|data.pe$Mito=="NCYC110")
data.pePY<-subset(data.pe,data.pe$Mito=="BY4741"|data.pe$Mito=="YPS606")
Table S10
Parental vs. hgh1∆
mtDNA comparison: BY4741 vs NCYC110
FM.phPN.cbind<-glm(cbind(data.phPN$Petite, data.phPN$Grande)~data.phPN$Nuclear*data.phPN$Mito, family = binomial)
      anova(FM.phPN.cbind, test = "Chisq") 
Table S10
Parental vs. hgh1∆
mtDNA comparison: BY4741 vs YPS606
FM.phPY.cbind<-glm(cbind(data.phPY$Petite, data.phPY$Grande)~data.phPY$Nuclear*data.phPY$Mito, family = binomial)
      anova(FM.phPY.cbind, test = "Chisq") 
Table S10
Parental vs. yat1∆
mtDNA comparison: BY4741 vs NCYC110
FM.pyPN.cbind<-glm(cbind(data.pyPN$Petite, data.pyPN$Grande)~data.pyPN$Nuclear*data.pyPN$Mito, family = binomial)
      anova(FM.pyPN.cbind, test = "Chisq") 
Table S10
Parental vs. yat1∆
mtDNA comparison: BY4741 vs YPS606
FM.pyPY.cbind<-glm(cbind(data.pyPY$Petite, data.pyPY$Grande)~data.pyPY$Nuclear*data.pyPY$Mito, family = binomial)
      anova(FM.pyPY.cbind, test = "Chisq") 

###updated Table S10 ##parental vs. est1∆ ###### mtDNA comparison: BY4741 vs NCYC110

FM.pePN.cbind<-glm(cbind(data.pePN$Petite, data.pePN$Grande)~data.pePN$Nuclear*data.pePN$Mito, family = binomial)
anova(FM.pePN.cbind, test = "Chisq") 
Table S10
Parental vs. est1∆
mtDNA comparison: BY4741 vs YPS606
FM.pePY.cbind<-glm(cbind(data.pePY$Petite, data.pePY$Grande)~data.pePY$Nuclear*data.pePY$Mito, family = binomial)
anova(FM.pePY.cbind, test = "Chisq") 

Fig S4

Mitonuclear effects of bns1-delta, smi1-delta and hgh1-delta on Petite frequencies

#import data
data.ko.mtn.bhg<-read.csv("data.ko.mtn.rollerassays_bns1smi1hgh1.csv", header = TRUE, stringsAsFactors = FALSE)
data.ko.mtn.bhg$Mitotype<-as.factor(data.ko.mtn.bhg$Mitotype)
data.ko.mtn.bhg$GeneKO<-as.factor(data.ko.mtn.bhg$GeneKO)
#set classes
data.ko.mtn.bhg$Petite<-as.integer(data.ko.mtn.bhg$Petite)
data.ko.mtn.bhg$Grande<-as.integer(data.ko.mtn.bhg$Grande)
data.ko.mtn.bhg$Total<-as.integer(data.ko.mtn.bhg$Total)

interaction plot

interaction.plot(data.ko.mtn.bhg$Mitotype ,data.ko.mtn.bhg$GeneKO ,data.ko.mtn.bhg$Pet.Mean,
                 xlab = "Mitotype",ylab = "Mean Petite Frequency",
                 main = "Fig S5",
                 ylim=c(0,30),trace.label="Nuclear", type = "b", col=c("green","black","blue","orange"),
                 pch = c(15,17), fixed =TRUE)

Table S11 stats
Table S10 anova on full dataset
      FM.ko.mtn.bhg.cbind<-glm(cbind(data.ko.mtn.bhg$Petite, data.ko.mtn.bhg$Grande)~data.ko.mtn.bhg$GeneKO*data.ko.mtn.bhg$Mitotype, family = binomial)
      anova(FM.ko.mtn.bhg.cbind, test = "Chisq")

Expression Data

import data

data.rtpcr<-read.table("Relative Expression.txt")

data.rtpcr$Can_Hap<-as.factor(data.rtpcr$Can_Hap)
data.rtpcr$Mitotype<-as.factor(data.rtpcr$Mitotype)
data.rtpcr$MIP1_Hap<-as.factor(data.rtpcr$MIP1_Hap)

###Fig S6 ### Does expression correlate with petite frequency? ##### Plot

require(ggplot2)
library(ggplot2)
# Gene Expression compared to petite frequency. MPI1 = ALL
BNS1.pf <- ggplot(data.rtpcr, aes(x = BNS1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("BNS1") + xlab("Expression") + ylab("Petite Frequency (%)") 
HGH1.pf <- ggplot(data.rtpcr, aes(x = HGH1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("HGH1") + xlab("Expression") + ylab("Petite Frequency (%)")
SMI1.pf <- ggplot(data.rtpcr, aes(x = SMI1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("SMI1") + xlab("Expression") + ylab("Petite Frequency (%)")
YAT1.pf <- ggplot(data.rtpcr, aes(x = YAT1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("YAT1") + xlab("Expression") + ylab("Petite Frequency (%)")
MIP1.pf <- ggplot(data.rtpcr, aes(x = MIP1_res, y = Pet_Freq)) + geom_point() + geom_smooth(method = lm) + ggtitle("MIP1") + xlab("Expression") + ylab("Petite Frequency (%)")

# combine plots to a single grid 
require("gridExtra")
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.pf <- grid.arrange(BNS1.pf, HGH1.pf, SMI1.pf, YAT1.pf, MIP1.pf, nrow = 2, ncol = 3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

###
stats for Fig S6
# Does expression of MIP1 correlate with petite frequency?? 

# linear model of correlation between expression and petite frequency in MIP1
lm.mip.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$MIP1_res)
anova.mip.pfr <- anova(lm.mip.pfr)
anova.mip.pfr
# p = 0.1229927, no correlation 
summary(lm.mip.pfr) #R2=0.1422,
## 
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$MIP1_res)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.104 -4.217 -1.482  2.172 14.439 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.276      1.452   4.323 0.000525 ***
## data.rtpcr$MIP1_res 1885.434   1157.926   1.628 0.122993    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.159 on 16 degrees of freedom
## Multiple R-squared: 0.1422,
## Adjusted R-squared: 0.08854 
## F-statistic: 2.651 on 1 and 16 DF,  p-value: 0.123
cor.test.mip.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$MIP1_res)
cor.test.mip.pfr #cor=r=0.0.3770298
## 
##  Pearson's product-moment correlation
## 
## data:  data.rtpcr$Pet_Freq and data.rtpcr$MIP1_res
## t = 1.6283, df = 16, p-value = 0.123
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1090326  0.7175873
## sample estimates:
##       cor 
## 0.3770298
cor.test.mip.pfr$p.value #p=0.1229927
## [1] 0.1229927
# Does expression of BNS1 correlate with petite frequency?
lm.bns.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$BNS1_res)
anova.bns.pfr <- anova(lm.bns.pfr)
anova.bns.pfr
# p = 0.4434, no correlation 
summary(lm.bns.pfr) #R2=0.03717
## 
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$BNS1_res)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.960 -4.257 -2.573  4.258 16.373 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.276      1.538   4.081 0.000871 ***
## data.rtpcr$BNS1_res 1050.590   1336.746   0.786 0.443392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.525 on 16 degrees of freedom
## Multiple R-squared: 0.03717,
## Adjusted R-squared: -0.02301 
## F-statistic: 0.6177 on 1 and 16 DF,  p-value: 0.4434
cor.test.bns.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$BNS1_res)
cor.test.bns.pfr #cor=r=0.1927965
## 
##  Pearson's product-moment correlation
## 
## data:  data.rtpcr$Pet_Freq and data.rtpcr$BNS1_res
## t = 0.78593, df = 16, p-value = 0.4434
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3011834  0.6051926
## sample estimates:
##       cor 
## 0.1927965
cor.test.bns.pfr$p.value #p=0.4433923
## [1] 0.4433923
#Does expression of HGH1 correlate with petite frequency?
lm.hgh.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$HGH1_res)
anova.hgh.pfr <- anova(lm.hgh.pfr)
anova.hgh.pfr
# p = 0.07751, no correlation :/
summary(lm.hgh.pfr) #R2=0.182,
## 
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$HGH1_res)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.540 -4.309 -1.267  2.474 13.925 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.276      1.418   4.427 0.000423 ***
## data.rtpcr$HGH1_res 1773.321    940.002   1.887 0.077511 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.014 on 16 degrees of freedom
## Multiple R-squared: 0.182,
## Adjusted R-squared: 0.1308 
## F-statistic: 3.559 on 1 and 16 DF,  p-value: 0.07751
cor.test.hgh.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$HGH1_res)
cor.test.hgh.pfr #cor=r=0.4265
## 
##  Pearson's product-moment correlation
## 
## data:  data.rtpcr$Pet_Freq and data.rtpcr$HGH1_res
## t = 1.8865, df = 16, p-value = 0.07751
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05032692  0.74505714
## sample estimates:
##       cor 
## 0.4265658
cor.test.hgh.pfr$p.value #p=0.0775
## [1] 0.07751052
#Does expression of SMI1 correlate with petite frequency?
lm.smi.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$SMI1_res)
anova.smi.pfr <- anova(lm.smi.pfr)
anova.smi.pfr
# p = 0.2322, no correlation 
summary(lm.smi.pfr) #R2=0.08791
## 
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$SMI1_res)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.147 -3.876 -2.149  3.996 14.976 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.276      1.497   4.193 0.000689 ***
## data.rtpcr$SMI1_res 3561.285   2867.811   1.242 0.232202    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.351 on 16 degrees of freedom
## Multiple R-squared: 0.08791,
## Adjusted R-squared: 0.0309 
## F-statistic: 1.542 on 1 and 16 DF,  p-value: 0.2322
cor.test.smi.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$SMI1_res)
cor.test.smi.pfr #cor=r=0.2964936
## 
##  Pearson's product-moment correlation
## 
## data:  data.rtpcr$Pet_Freq and data.rtpcr$SMI1_res
## t = 1.2418, df = 16, p-value = 0.2322
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1977498  0.6705443
## sample estimates:
##       cor 
## 0.2964936
cor.test.smi.pfr$p.value #p=0.2322017
## [1] 0.2322017
#Does expression of YAT1 correlate with petite frequency?
lm.yat.pfr <- lm(data.rtpcr$Pet_Freq ~ data.rtpcr$YAT1_res)
anova.yat.pfr <- anova(lm.yat.pfr)
anova.yat.pfr
# p = 0.4723, no correlation
summary(lm.yat.pfr) #R2=0.03276
## 
## Call:
## lm(formula = data.rtpcr$Pet_Freq ~ data.rtpcr$YAT1_res)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.696 -4.470 -3.240  4.654 18.053 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6.276      1.541   4.072 0.000888 ***
## data.rtpcr$YAT1_res  449.326    610.341   0.736 0.472272    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## s: 6.54 on 16 degrees of freedom
## Multiple R-squared: 0.03276,
## Adjusted R-squared: -0.02769 
## F-statistic: 0.542 on 1 and 16 DF,  p-value: 0.4723
cor.test.yat.pfr<-cor.test(data.rtpcr$Pet_Freq,data.rtpcr$YAT1_res)
cor.test.yat.pfr #cor=r=0.0.181
## 
##  Pearson's product-moment correlation
## 
## data:  data.rtpcr$Pet_Freq and data.rtpcr$YAT1_res
## t = 0.73619, df = 16, p-value = 0.4723
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3122503  0.5973934
## sample estimates:
##       cor 
## 0.1810071
cor.test.yat.pfr$p.value #p=0.47227
## [1] 0.4722721

Fig S7

Expression based on mitonuclear candidate haplotypes or MIP1 haplotype

plot
#Fig S7
require(ggplot2)
library(ggplot2)

# Gene Expression in Candidate Haplotypes (DDDA, GAID)
BNS1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = BNS1_res)) + geom_boxplot() + ggtitle("BNS1") + xlab("MtN Hap") + ylab(NULL)
HGH1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = HGH1_res)) + geom_boxplot() + ggtitle("HGH1") + xlab("MtN Hap") + ylab("Normalized Expression (residuals)")
SMI1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = SMI1_res)) + geom_boxplot() + ggtitle("SMI1") + xlab("MtN Hap") + ylab(NULL)
YAT1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = YAT1_res)) + geom_boxplot() + ggtitle("YAT1") + xlab("MtN Hap") + ylab(NULL)
MIP1.Can <- ggplot(data.rtpcr, aes(x = Can_Hap, y = MIP1_res)) + geom_boxplot() + ggtitle("MIP1") + xlab("MtN Hap") + ylab(NULL)


# Gene Expression in MIP1 Haplotypes (GGC, TAT)
BNS1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = BNS1_res)) + geom_boxplot() + ggtitle("BNS1") + xlab("MIP1 Hap") + ylab(NULL)
HGH1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = HGH1_res)) + geom_boxplot() + ggtitle("HGH1") + xlab("MIP1 Hap") + ylab("Normalized Expression (residuals)")
SMI1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = SMI1_res)) + geom_boxplot() + ggtitle("SMI1") + xlab("MIP1 Hap") + ylab(NULL)
YAT1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = YAT1_res)) + geom_boxplot() + ggtitle("YAT1") + xlab("MIP1 Hap") + ylab(NULL)
MIP1.MIP1_Hap <- ggplot(data.rtpcr, aes(x = MIP1_Hap, y = MIP1_res)) + geom_boxplot() + ggtitle("MIP1") + xlab("MIP1 Hap") + ylab(NULL)

require("gridExtra")
grid.CAN<-grid.arrange(HGH1.Can, BNS1.Can, SMI1.Can, YAT1.Can, MIP1.Can, nrow = 1, ncol = 5)

grid.MIP1.hap <- grid.arrange(HGH1.MIP1_Hap,BNS1.MIP1_Hap,SMI1.MIP1_Hap,YAT1.MIP1_Hap, MIP1.MIP1_Hap, nrow = 1, ncol = 5)

plotcombined<-ggarrange(grid.CAN,grid.MIP1.hap, 
                         labels = c("A", "B"),
                         ncol = 1, nrow = 2, align = "h")
plotcombined

Table S12

P values from these analyses shown on Fig S7
# Is expression of MIP1 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction? 
lm.exp.mip <- lm(data.rtpcr$MIP1_res ~ (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.mip <- anova(lm.exp.mip)
anova.exp.mip
# Is expression of BNS11 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction? 

lm.exp.bns <- lm(data.rtpcr$BNS1_res ~ (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.bns <- anova(lm.exp.bns)
anova.exp.bns
# Is expression of HGH11 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction? 

lm.exp.hgh <- lm(data.rtpcr$HGH1_res ~ (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.hgh <- anova(lm.exp.hgh)
anova.exp.hgh
# Is expression of SMI1 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction? 

lm.exp.smi <- lm(data.rtpcr$SMI1_res ~ (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.smi <- anova(lm.exp.smi)
anova.exp.smi
#  Is expression of YAT1 influenced by it's haplotype, the haplotype of the mitonuclear candidates, mitotype or any interaction? 

lm.exp.yat <- lm(data.rtpcr$YAT1_res ~  (data.rtpcr$Can_Hap * data.rtpcr$Mitotype * data.rtpcr$MIP1_Hap))
anova.exp.yat <- anova(lm.exp.yat)
anova.exp.yat

###Figure 7A

#correlations between RC1 and RC2 growth data and petite frequencies
#import data
data.RC12<-read.csv("Pet_growth_RC12.csv")
head(data.RC12)
#does pet frequency correlate with growth data at 30 across the entire RC1 and RC2 collections?
      plot(data.RC12$Petitefrequency, data.RC12$SD_Pheno_30)

      cor.test(data.RC12$Petitefrequency, data.RC12$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.RC12$Petitefrequency and data.RC12$SD_Pheno_30
## t = 3.0265, df = 333, p-value = 0.002667
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.05746919 0.26610434
## sample estimates:
##       cor 
## 0.1636157
      cols<-c("X273614N"= "grey40","YPS606" ="#009E73")
      
      plotRC12 = ggplot(data.RC12, aes(x=SD_Pheno_30,y= Petitefrequency))+
        geom_point(aes(color=Mito),size = 2)+scale_color_manual(values=cols)+
        labs(x = "Vmax_30C", y = "Petite Frequency (%)")+
        theme_bw()+
        theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
        theme(axis.line = element_line(color = 'black'))+
        theme(axis.text.x = element_text(size = 10))+
        theme(legend.position = "none")+
        geom_smooth(method = "lm", se = FALSE, color="black")
#Fig7A
      plotRC12
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

      #is this significance driven by the one datapoint where petfreq>200?
      data_subset<-subset(data.RC12,data.RC12$Petitefrequency<20)
      cor.test(data_subset$Petitefrequency, data_subset$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data_subset$Petitefrequency and data_subset$SD_Pheno_30
## t = 2.6073, df = 332, p-value = 0.009538
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03486742 0.24523851
## sample estimates:
##       cor 
## 0.1416519
      #is the vmax/petite correlation observed in RC1, RC2 alone?
      data.RC1<-subset(data.RC12,data.RC12$Mito == "X273614N")
      data.RC2<-subset(data.RC12,data.RC12$Mito == "YPS606")
      plot(data.RC1$Petitefrequency, data.RC1$SD_Pheno_30)

      plot(data.RC2$Petitefrequency, data.RC2$SD_Pheno_30)

      cor.test(data.RC1$Petitefrequency, data.RC1$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.RC1$Petitefrequency and data.RC1$SD_Pheno_30
## t = 0.85311, df = 176, p-value = 0.3948
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08370201  0.20928205
## sample estimates:
##        cor 
## 0.06417273
      #corr = 0.064 p = 0.39
      cor.test(data.RC2$Petitefrequency, data.RC2$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.RC2$Petitefrequency and data.RC2$SD_Pheno_30
## t = 3.1071, df = 155, p-value = 0.002248
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08887121 0.38420459
## sample estimates:
##       cor 
## 0.2421386
      #corr = 0.2 p = 0.002

#Fig7B

#The RC contains MIP1 (and other) alleles with strong independent effects on petite frequency. #maybe mitonuclear influenced pet freq correlations with growth will be more evident if the RC is subset into strains that have the high v low MIP1 alleles. #The file Nsnps contains the genotypes for each RC strain at the snps with significant nuclear associations.

#import file and rename first column
      Nsnps<-read.csv("N_snps.csv")
      names(Nsnps)[1]<-"Strain"
      
      #subset Nsnps to Chr15 alleles and add to dataframe
      df.MIP1snps<-Nsnps[,c("Strain","chr15.943237")]
      data.mip1<-merge(data.RC12,df.MIP1snps, by="Strain")
      #subset to MIP1 T and MIP1C
      data.mip1T<-subset(data.mip1, chr15.943237=="T")
      data.mip1C<-subset(data.mip1,chr15.943237=="C")
      #subset to RC1 and RC2
      data.RC1.mip1T<-subset(data.mip1T, Mito=="X273614N")
      data.RC2.mip1T<-subset(data.mip1T,Mito=="YPS606")
      
      data.RC1.mip1C<-subset(data.mip1C,Mito=="X273614N")
      data.RC2.mip1C<-subset(data.mip1C,Mito=="YPS606")
      
      #correlations
      #mip1T
      cor.test(data.mip1T$Petitefrequency, data.mip1T$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.mip1T$Petitefrequency and data.mip1T$SD_Pheno_30
## t = 1.2604, df = 175, p-value = 0.2092
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05339938  0.23900518
## sample estimates:
##       cor 
## 0.0948483
      #not correlated
      
      #mip1C
      cor.test(data.mip1C$Petitefrequency, data.mip1C$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.mip1C$Petitefrequency and data.mip1C$SD_Pheno_30
## t = 2.6583, df = 156, p-value = 0.008672
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.05377874 0.35284259
## sample estimates:
##      cor 
## 0.208171
      #cor = 0.2 p = 0.008672
      #RC1 MIP1T
      cor.test(data.RC1.mip1T$Petitefrequency, data.RC1.mip1T$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.RC1.mip1T$Petitefrequency and data.RC1.mip1T$SD_Pheno_30
## t = 0.26131, df = 92, p-value = 0.7944
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1763563  0.2285894
## sample estimates:
##        cor 
## 0.02723385
      #not correlated
      
      #RC1 MIP1C
      cor.test(data.RC1.mip1C$Petitefrequency, data.RC1.mip1C$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.RC1.mip1C$Petitefrequency and data.RC1.mip1C$SD_Pheno_30
## t = 0.22439, df = 82, p-value = 0.823
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1906351  0.2379042
## sample estimates:
##        cor 
## 0.02477252
      #not correlated
      
      #RC2 MIP1T
      cor.test(data.RC2.mip1T$Petitefrequency, data.RC2.mip1T$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.RC2.mip1T$Petitefrequency and data.RC2.mip1T$SD_Pheno_30
## t = 1.1257, df = 81, p-value = 0.2636
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.09410133  0.33093751
## sample estimates:
##       cor 
## 0.1241069
      #not correlated
      
      #RC2 MIP1C
      cor.test(data.RC2.mip1C$Petitefrequency, data.RC2.mip1C$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.RC2.mip1C$Petitefrequency and data.RC2.mip1C$SD_Pheno_30
## t = 3.4043, df = 72, p-value = 0.001087
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1572312 0.5537380
## sample estimates:
##       cor 
## 0.3723524
      #corr = 0.37 p = 0.001
     
      #plot
      
      plot.RC2.MIP1C = ggplot(data.RC2.mip1C, aes(x=SD_Pheno_30,y= Petitefrequency))+
        geom_point(aes(color=Mito),size = 2)+scale_color_manual(values=cols)+
        labs(x = "Vmax_30C", y = "Petite Frequency (%)")+
        theme_bw()+
        theme(plot.background = element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.border = element_blank() ) +
        theme(axis.line = element_line(color = 'black'))+
        theme(axis.text.x = element_text(size = 10))+
        theme(legend.position = "none")+
        geom_smooth(method = "lm", se = FALSE, color="black")
#Fig7B
      plot.RC2.MIP1C
## `geom_smooth()` using formula 'y ~ x'

          #is this still significant if the outlier is removed?
          
          data.RC2.MIP1C_subset<-subset(data.RC2.mip1C,data.RC2.mip1C$Petitefrequency<20)
          cor.test(data.RC2.MIP1C_subset$Petitefrequency, data.RC2.MIP1C_subset$SD_Pheno_30)
## 
##  Pearson's product-moment correlation
## 
## data:  data.RC2.MIP1C_subset$Petitefrequency and data.RC2.MIP1C_subset$SD_Pheno_30
## t = 3.4043, df = 72, p-value = 0.001087
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1572312 0.5537380
## sample estimates:
##       cor 
## 0.3723524
            #yes, it is R=0.37, p=0.001

#Fig 7C and 7D #emvironmental conditions that alter growth rates also alter petite frequencies #growth rates and petite frequencies were measured for 6 strains in low sugar at 30C, 30C, and 37C

###for fig 7CD
      
      #growth
      dfgrowth_full = read.csv("Vmax5_30_37_LS.csv", na.strings="NA",header=T)
     # dfgrowth.summary<-ddply(dfgrowth_full,c("Nuclear", "Condition"), summarize, N1 = length(VmaxRaw), Vmaxaverage = mean(VmaxRaw), sd1 = sd(VmaxRaw), se = sd1 / sqrt (N1))
      dfgrowth_subset<-subset(dfgrowth_full,Nuclear %in% c("UWOPS05-217.3","L-1528","BC187","YPS128","YPS606","Y12"))
      dfgrowth_LSHS<-subset(dfgrowth_subset,Condition %in% c("LS","CSM30"))
      dfgrowth_3037<-subset(dfgrowth_subset,Condition %in% c("CSM30","CSM37"))
      
      #reorganize isolates for a certain order
      dfgrowth_subset$Nuclear <- factor(dfgrowth_subset$Nuclear, levels=c("Y12","UWOPS05-217.3","YPS606","YPS128","L-1528","BC187"))
      dfgrowth_subset$Condition <- factor(dfgrowth_subset$Condition, levels=c("LS","CSM30","CSM37"))
      
      dfgrowth_LSHS$Nuclear<-factor(dfgrowth_LSHS$Nuclear, levels=c("Y12","UWOPS05-217.3","YPS606","YPS128","L-1528","BC187"))
      dfgrowth_LSHS$Condition<-factor(dfgrowth_LSHS$Condition, levels=c("LS","CSM30"))
      
      dfgrowth_3037$Nuclear<-factor(dfgrowth_3037$Nuclear, levels=c("Y12","UWOPS05-217.3","YPS606","YPS128","L-1528","BC187"))
      dfgrowth_3037$Condition<-factor(dfgrowth_3037$Condition, levels=c("CSM30","CSM37"))
      
      #plots
      plot.growth<-ggplot(dfgrowth_subset,aes(x=Nuclear,y=VmaxRaw,fill=Condition))+geom_boxplot()
      plot.growth

      plot.growth.LSHS<-ggplot(dfgrowth_LSHS,aes(x=Nuclear, y=VmaxRaw,fill=Condition))+geom_boxplot()
      plot.growth.LSHS

      plot.growth.3037<-ggplot(dfgrowth_3037,aes(x=Nuclear, y=VmaxRaw,fill=Condition))+geom_boxplot()
      plot.growth.3037

      #petite
      dfpet = read.csv("petite_3037LS.csv",na.strings="NA",header=T)
      dfpet_LSHS<-subset(dfpet,Condition %in% c("LS30","CSM30"))
      dfpet_3037<-subset(dfpet,Condition %in% c("CSM30","CSM37"))                
      #reorganize isolates for a certain order                  
      dfpet$Strain <- factor(dfpet$Strain, levels=c("Y12","UWOPS052173","YPS606","YPS128","L1528","BC187"))
      dfpet$Condition <- factor(dfpet$Condition, levels=c("LS30","CSM30","CSM37"))
      
      dfpet_LSHS$Strain <- factor(dfpet_LSHS$Strain, levels=c("Y12","UWOPS052173","YPS606","YPS128","L1528","BC187"))
      dfpet_LSHS$Condition <- factor(dfpet_LSHS$Condition, levels=c("LS30","CSM30","CSM37"))
      
      dfpet_3037$Strain <- factor(dfpet_3037$Strain, levels=c("Y12","UWOPS052173","YPS606","YPS128","L1528","BC187"))
      dfpet_3037$Condition <- factor(dfpet_3037$Condition, levels=c("LS30","CSM30","CSM37"))
      
      #quick plots
      plot.pet<-ggplot(dfpet,aes(x=Strain,y=Petitefrequency,fill=Condition))+geom_boxplot()
      plot.pet

      plot.pet.LSHS<-ggplot(dfpet_LSHS,aes(x=Strain,y=Petitefrequency,fill=Condition))+geom_boxplot()
      plot.pet.LSHS

      plot.pet.3037<-ggplot(dfpet_3037,aes(x=Strain,y=Petitefrequency,fill=Condition))+geom_boxplot()
      plot.pet.3037

      #shaded plots
      #growth... LS CSM30 CSM37
      rect_df<-data.frame(Nuclear=unique(dfgrowth_subset$Nuclear))
      rect_df$VmaxRaw<-0
      head(rect_df)
      plot.growth<-ggplot(dfgrowth_subset,aes(x=Nuclear, y=VmaxRaw,fill=Condition))+geom_boxplot()
      plot.growth

      plot.vmax.shaded <- ggplot(dfgrowth_subset,aes(factor(Nuclear),VmaxRaw, fill = Condition))+
        geom_rect(data=rect_df,
                  aes(fill=factor(Nuclear)), alpha=0.3,
                  xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)+
        scale_fill_manual(values=c("BC187"= "#56B4E9","L-1528"= "#56B4E9","UWOPS05-217.3" ="#CC79A7", "YPS606"="#009E73", "YPS128"="#009E73", "Y12" = "#D55E00","LS"="grey100","CSM30"="grey75","CSM37"="grey50"))+
        geom_boxplot()+ theme(legend.position = "none",axis.ticks.x=element_blank(), axis.text.x=element_blank())+xlab("")+ ylab("Vmax (arbitrary units)")+
        facet_grid(~factor(Nuclear), scales='free_x')
#Fig7C
      plot.vmax.shaded

      #petite LS 30 37
      rect_df2<-data.frame(Strain=unique(dfpet$Strain))
      rect_df2$Petitefrequency<-0
      head(rect_df2)
      plot.pet<-ggplot(dfpet,aes(x=Strain, y=Petitefrequency,fill=Condition))+geom_boxplot()
      plot.pet

      plot.pet.shaded <- ggplot(dfpet,aes(factor(Strain),Petitefrequency, fill = Condition))+
        geom_rect(data=rect_df2,
                  aes(fill=factor(Strain)), alpha=0.3,
                  xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)+
        scale_fill_manual(values=c("BC187"= "#56B4E9","L1528"= "#56B4E9","UWOPS052173" ="#CC79A7", "YPS606"="#009E73", "YPS128"="#009E73", "Y12" = "#D55E00","LS30"="grey100","CSM30"="grey75","CSM37"="grey50"))+
        geom_boxplot()+ theme(legend.position = "none",axis.ticks.x=element_blank(), axis.text.x=element_blank())+xlab("low sugar (white), 30C (light gray), 37C(dark gray)")+ ylab("Petite frequency (%)")+
        facet_grid(~factor(Strain), scales='free_x')
 #Fig7D
      plot.pet.shaded